use csv::{ReaderBuilder, Terminator};
use std::collections::HashMap;
type StringMap = Vec<HashMap<String, Option<String>>>;
pub fn read_file<T>(
csv_file: &str,
delimiter: char,
has_header: bool,
) -> std::io::Result<(Vec<T>, usize)>
where
T: serde::de::DeserializeOwned,
{
let mut rdr = csv::ReaderBuilder::new()
.has_headers(has_header)
.delimiter(delimiter as u8)
.double_quote(true)
.escape(Some(b'\\'))
.flexible(true) .from_path(csv_file)?;
let headers = if has_header {
Some(rdr.byte_headers()?.clone())
} else {
None
};
let mut count = 0;
let mut list: Vec<T> = vec![];
let mut raw_record = csv::ByteRecord::new();
while rdr.read_byte_record(&mut raw_record)? {
log::info!("raw_record: {:?}", raw_record);
let result = match headers {
Some(ref headers) => raw_record.deserialize::<T>(Some(headers)),
None => raw_record.deserialize::<T>(None),
};
match result {
Ok(record) => {
list.push(record);
count += 1;
}
Err(e) => {
count += 1;
log::error!("read_csv_error: error={:?}", e);
}
}
}
Ok((list, count as usize))
}
pub fn read_csv(
file_path: &str,
fields: Vec<&str>,
batch_size: usize,
handler: &dyn Fn(&StringMap),
) -> Result<(), anyhow::Error> {
let mut builder = ReaderBuilder::new();
builder
.double_quote(false)
.comment(Some(b'-'))
.has_headers(false)
.terminator(Terminator::CRLF);
let mut reader = builder.from_path(file_path)?;
let mut map_list = vec![];
let mut total_count = 0;
for line in reader.records().flatten() {
let mut curr_row = HashMap::new();
for (i, field) in fields.iter().enumerate() {
if let Some(cell) = line.get(i) {
if cell == "NULL" {
curr_row.insert(field.to_string(), None);
} else {
curr_row.insert(field.to_string(), Some(cell.to_string()));
}
} else {
curr_row.insert(field.to_string(), None);
}
}
map_list.push(curr_row);
total_count += 1;
if total_count % batch_size == 0 {
handler(&map_list);
map_list.clear();
}
}
handler(&map_list);
log::info!(
"read_csv: total_count={}, map_size={}",
total_count,
map_list.len()
);
Ok(())
}