csv_diff/
csv_hasher.rs

1use std::hash::Hasher;
2use xxhash_rust::xxh3::{xxh3_128, Xxh3};
3
4pub(crate) trait CsvHasherExt {
5    fn hash_key_fields(&self, key_fields_idx: &[usize]) -> u128;
6
7    fn hash_record(&self) -> u128;
8}
9
10impl CsvHasherExt for csv::ByteRecord {
11    #[inline]
12    fn hash_key_fields(&self, key_fields_idx: &[usize]) -> u128 {
13        let mut hasher = Xxh3::new();
14        let key_fields = key_fields_idx.iter().filter_map(|k_idx| self.get(*k_idx));
15
16        // TODO: try to do it with as few calls to `write` as possible
17        // in order to still be efficient and do as few `write` calls as possible
18        // consider using `csv_record.range(...)` method
19        for key_field in key_fields {
20            hasher.write(key_field);
21        }
22        hasher.digest128()
23    }
24
25    #[inline]
26    fn hash_record(&self) -> u128 {
27        // TODO: don't hash all of it -> exclude the key fields
28        // in order to still be efficient and do as few `write` calls as possible
29        // consider using `csv_record.range(...)` method
30        xxh3_128(self.as_slice())
31    }
32}