1use std::collections::HashMap;
2use std::io::{self, BufReader};
3use std::fs::File;
4use std::path::Path;
5use std::hash::Hash;
6use serde::de::DeserializeOwned;
7use super::{Location, EngineOptions};
8
9
10pub(crate) type Index<K> = HashMap<K, Location>;
11pub(crate) fn make_index<P, K, V, F>(path: P, opts: EngineOptions, indexing_key_fn: F) -> io::Result<Index<K>>
12 where
13 P: AsRef<Path>,
14 K: Hash + Eq,
15 V: DeserializeOwned,
16 F: FnMut(V) -> Option<K>,
17{
18 let reader = make_csv_reader(&path, opts)?;
19 let index = reader.into_records()
20 .flatten()
21 .scan(indexing_key_fn, |f, value| extract_key_location_pair(value, f))
22 .collect();
23 Ok(index)
24}
25
26
27type CSVBufFileReader = csv::Reader<BufReader<File>>;
28fn make_csv_reader<P: AsRef<Path>>(path: P, opts: EngineOptions) -> io::Result<CSVBufFileReader> {
29 let file = File::open(path)?;
30 let reader = BufReader::with_capacity(opts.buf_capacity, file);
31 let csv_reader = csv::ReaderBuilder::new()
32 .delimiter(opts.delimiter)
33 .has_headers(false)
34 .from_reader(reader);
35 Ok(csv_reader)
36}
37
38
39fn extract_key_location_pair<K, V, F>(rec: csv::StringRecord, indexing_key_fn: &mut F) -> Option<(K, Location)>
40 where
41 K: Hash + Eq,
42 V: DeserializeOwned,
43 F: FnMut(V) -> Option<K>,
44{
45 const DELIMITER_LEN: usize = 1;
46 let offset = rec.position()?.byte();
47 let len = rec.as_slice().len() + (rec.len() - 1) * DELIMITER_LEN;
48 let loc = Location { offset, len };
49 let key = rec.deserialize(None).ok().and_then(|v| indexing_key_fn(v))?;
50 Some((key, loc))
51}