csvquery/
index.rs

1use std::collections::HashMap;
2use std::io::{self, BufReader};
3use std::fs::File;
4use std::path::Path;
5use std::hash::Hash;
6use serde::de::DeserializeOwned;
7use super::{Location, EngineOptions};
8
9
10pub(crate) type Index<K> = HashMap<K, Location>;
11pub(crate) fn make_index<P, K, V, F>(path: P, opts: EngineOptions, indexing_key_fn: F) -> io::Result<Index<K>> 
12    where 
13        P: AsRef<Path>,
14        K: Hash + Eq,
15        V: DeserializeOwned,
16        F: FnMut(V) -> Option<K>,
17{
18    let reader = make_csv_reader(&path, opts)?;
19    let index = reader.into_records()
20        .flatten()
21        .scan(indexing_key_fn, |f, value| extract_key_location_pair(value, f))
22        .collect();
23    Ok(index)
24}
25
26
27type CSVBufFileReader = csv::Reader<BufReader<File>>;
28fn make_csv_reader<P: AsRef<Path>>(path: P, opts: EngineOptions) -> io::Result<CSVBufFileReader> {
29    let file = File::open(path)?;
30    let reader = BufReader::with_capacity(opts.buf_capacity, file);
31    let csv_reader = csv::ReaderBuilder::new()
32        .delimiter(opts.delimiter)
33        .has_headers(false)
34        .from_reader(reader);
35    Ok(csv_reader)
36}
37
38
39fn extract_key_location_pair<K, V, F>(rec: csv::StringRecord, indexing_key_fn: &mut F) -> Option<(K, Location)>
40    where 
41        K: Hash + Eq,
42        V: DeserializeOwned,
43        F: FnMut(V) -> Option<K>,
44{
45    const DELIMITER_LEN: usize = 1;
46    let offset = rec.position()?.byte();
47    let len = rec.as_slice().len() + (rec.len() - 1) * DELIMITER_LEN;
48    let loc = Location { offset, len };
49    let key = rec.deserialize(None).ok().and_then(|v| indexing_key_fn(v))?;
50    Some((key, loc))
51}