use super::*;
use std::fs::File;
use std::io::{BufRead, Seek};
use std::io::{SeekFrom};
#[cfg(not(feature = "mmap"))]
use std::io::{BufReader};
#[cfg(feature = "mmap")]
use std::io::Cursor;
#[cfg(feature = "mmap")]
use memmap::MmapOptions;
use std::str;
use std::vec::*;
use anyhow::Error;
use memchr::memchr2_iter;
use super::parser_core::{NwLine, ParserState, CoreData};
pub fn parse_txt<T>(f: &str, params: &ReaderParams) -> Result<T, Error>
where
T: RawReaderParse,
{
let file = File::open(f)?;
let cmt = if let Some(x) = params.comments {
x
} else {
b'\n'
};
#[cfg(feature = "mmap")]
let buffer = unsafe { MmapOptions::new().map(&file)? };
#[cfg(not(feature = "mmap"))]
let mut reader = BufReader::with_capacity(BUF_SIZE, file);
#[cfg(feature = "mmap")]
let mut reader = Cursor::new(&buffer[..]);
let num_lines = read_num_file_lines(& mut reader, cmt);
reader.seek(SeekFrom::Start(0))?;
match ¶ms.skip_header {
Some(x) => {
if *x >= num_lines {
return Err(format_err!(
"Input for skip_header greater than the number of readable lines in the file"
));
}
}
None => (),
}
let sk_h = if let Some(x) = params.skip_header {
x
} else {
0
};
match ¶ms.skip_footer {
Some(x) => {
if *x >= num_lines {
return Err(format_err!(
"Input for skip_footer greater than the number of readable lines in the file"
));
}
}
None => (),
}
let sk_f = if let Some(x) = params.skip_footer {
x
} else {
0
};
if num_lines <= (sk_h + sk_f) {
return Err(format_err!("Input for skip_footer and skip_header greater than or equal to the number of readable lines in the file"));
}
let num_lines_read = match ¶ms.max_rows {
Some(x) => {
let diff_lines = num_lines - sk_h - sk_f;
if diff_lines > *x {
*x
} else {
diff_lines
}
}
None => num_lines - sk_h - sk_f,
};
let delim_ws = match ¶ms.delimiter {
Delimiter::WhiteSpace => true,
Delimiter::Any(_b) => false,
};
let delim = match ¶ms.delimiter {
Delimiter::WhiteSpace => b' ',
Delimiter::Any(b) => *b,
};
let mut fln = 0;
skip_header_lines(&mut reader, &mut fln, cmt, sk_h);
let current_pos = reader.stream_position()?;
let tot_fields = count_num_fields(&mut reader, cmt, delim, delim_ws);
reader.seek(SeekFrom::Start(current_pos))?;
let cols = match ¶ms.usecols {
Some(x) => {
if x.len() > tot_fields {
return Err(format_err!("Input for usecols contains more values than the total number of fields {}", tot_fields));
}
if x.iter().any(|&x| x > tot_fields) {
return Err(format_err!("Input for usecols contains a value greater than or equal to the number of fields {}", tot_fields));
}
x.iter().map(|&x| x + 1).collect::<Vec<usize>>()
}
None => Vec::<usize>::new(),
};
let nfields = if !cols.is_empty() { cols.len() } else { tot_fields };
let mut results:T = T::new(nfields, num_lines_read);
let mut core_data = CoreData::<T>{
length: 0,
offset: 0,
cmt,
delim_ws,
delim,
fln,
cols: & cols,
field_counter: 0,
current_field: 0,
tot_fields,
results: &mut results,
};
let mut state = ParserState::NwLine(NwLine{});
loop {
let length = {
let buffer = reader.fill_buf().unwrap();
core_data.offset = 0;
let mut newline = memchr2_iter(b'\n', b'\r', buffer);
core_data.length = buffer.len();
while core_data.offset < core_data.length {
state = state.next(buffer, &mut newline, &mut core_data)?;
if core_data.results.get_num_lines() == num_lines_read {
break;
}
}
core_data.length
};
reader.consume(length);
if (length < BUF_SIZE) | (core_data.results.get_num_lines() == num_lines_read) {
break;
}
}
Ok(results)
}