Struct csv_diff::csv_diff::CsvByteDiff
source · pub struct CsvByteDiff<T: CsvHashTaskSpawner> { /* private fields */ }
Expand description
Compare two CSVs lazily with each other (for the eager-/blocking-based variant, see CsvByteDiffLocal
).
Use this instead of CsvByteDiffLocal
, when:
- you own your CSV data and you want to use an
Iterator
for the differences, so you don’t have to read all differences into memory - your CSV data structure does not support
Seek
.
By default, CsvByteDiff
uses a rayon thread-pool to compare differences.
If you already have an existing rayon thread-pool that you want to use for CsvByteDiff
, you can construct it with a builder (see also rayon_thread_pool
on CsvByteDiffBuilder
).
for using an existing rayon thread-pool
when creating CsvByteDiff
.
Example: create CsvByteDiff
with default values and compare two CSVs byte-wise lazily
use csv_diff::{csv_diff::CsvByteDiff, csv::Csv};
use csv_diff::diff_row::{ByteRecordLineInfo, DiffByteRecord};
use std::collections::HashSet;
use std::iter::FromIterator;
// some csv data with a header, where the first column is a unique id
let csv_left = "\
header1,header2,header3\n\
a,b,c";
let csv_right = "\
header1,header2,header3\n\
a,b,d";
let csv_diff = CsvByteDiff::new()?;
let mut diff_iterator = csv_diff.diff(
Csv::with_reader(csv_left.as_bytes()),
Csv::with_reader(csv_right.as_bytes()),
);
let diff_row_actual = diff_iterator
.next()
.ok_or("Expected a difference between the two CSVs, but got none".to_string())??;
let diff_row_expected = DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "d"]), 2),
field_indices: vec![2],
};
assert_eq!(diff_row_actual, diff_row_expected);
Ok(())