use std::{
fmt,
io::{BufReader, Read},
str::{from_utf8, Utf8Error},
};
use crate::{csv::Sep, schemas::LineParser};
use bytelines::ByteLines;
use std::result::Result;
use crate::schemas::LiteralLineParser;
#[derive(Debug, Default, Clone)]
pub struct Header {
pub fec_version: String,
pub software_name: String,
pub software_version: Option<String>,
pub report_id: Option<String>,
pub report_number: Option<String>,
}
#[derive(Debug, Clone)]
pub struct HeaderParseError {
pub message: String,
pub read: Vec<u8>,
}
impl fmt::Display for HeaderParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"HeaderParseError: {} (read: '{}')",
self.message,
String::from_utf8_lossy(&self.read)
)
}
}
impl std::error::Error for HeaderParseError {}
#[derive(Debug, Clone)]
pub struct HeaderParsing {
pub header: Header,
pub sep: Sep,
}
type Lines<R> = bytelines::ByteLinesIter<BufReader<R>>;
pub fn parse_header(src: &mut impl Read) -> Result<HeaderParsing, HeaderParseError> {
let mut lines = ByteLines::new(BufReader::with_capacity(1, src)).into_iter();
let mut read_bytes = Vec::new();
let first_line = next_line(&mut read_bytes, &mut lines).map_err(|e| HeaderParseError {
message: e.to_string(),
read: read_bytes.clone(),
})?;
let header;
if byte_slice_contains(&first_line, b"/*") {
header = parse_legacy_header(&mut lines, &mut read_bytes)
} else {
header = parse_nonlegacy_header(&first_line)
}
header.map_err(|e| HeaderParseError {
message: e,
read: read_bytes.clone(),
})
}
fn parse_legacy_header(
lines: &mut Lines<impl Read>,
read_bytes: &mut Vec<u8>,
) -> Result<HeaderParsing, String> {
log::debug!("parsing legacy header");
let mut header = Header::default();
let mut num_lines = 0;
let max_lines = 100;
loop {
let line_bytes = next_line(read_bytes, lines)?;
if byte_slice_contains(&line_bytes, b"/*") {
break;
}
num_lines += 1;
if num_lines > max_lines {
return Err(format!("more than {} lines in header", max_lines));
}
let line = byte_slice_to_string(&line_bytes);
if line.to_lowercase().contains("schedule_counts") {
continue;
}
let (key, value) = parse_legacy_kv(&line)?;
match key.to_lowercase().as_str() {
"fec_ver_#" => header.fec_version = value,
"soft_name" => header.software_name = value,
"soft_ver#" => header.software_version = Some(value),
_ => {}
}
}
if header.fec_version == "" {
return Err("missing FEC_Ver_#".to_string());
}
if header.software_name == "" {
return Err("missing Soft_Name".to_string());
}
if header.software_version.is_none() {
return Err("missing Soft_Ver#".to_string());
}
Ok(HeaderParsing {
header,
sep: Sep::Comma,
})
}
fn parse_legacy_kv(line: &str) -> std::result::Result<(String, String), String> {
let parts = line.split('=').collect::<Vec<&str>>();
if parts.len() != 2 {
return Err(format!("more than one '=' in header k=v line: {:?}", line));
}
let key = parts[0].trim().to_string();
let value = parts[1].trim().to_string();
Ok((key, value))
}
fn parse_nonlegacy_header(line: &Vec<u8>) -> Result<HeaderParsing, String> {
log::debug!("parsing non-legacy header");
let mut header = Header::default();
let sep = Sep::detect(line);
log::debug!("separator: {:?}", sep);
let parts: Result<Vec<&str>, Utf8Error> =
line.split(|c| *c == sep.to_byte()).map(from_utf8).collect();
let parts = parts.map_err(|e| e.to_string())?;
if parts.len() < 2 {
return Err(format!("less than 2 parts in header: {:?}", parts));
}
let version = match parts[1] {
"FEC" => {
if parts.len() < 3 {
return Err(format!("less than 3 parts in header: {:?}", parts));
}
parts[2]
}
_ => parts[1],
};
let string_parts = parts.iter().map(|s| s.to_string()).collect::<Vec<String>>();
let record = LiteralLineParser
.parse_line(version, &mut string_parts.iter())
.map_err(|e| e.to_string())?;
header.fec_version = version.to_string();
header.software_name = record
.get_value("soft_name")
.ok_or("missing soft_name")?
.to_string();
header.software_version = record.get_value("soft_ver").map(|s| s.to_string());
header.report_id = record.get_value("report_id").map(|s| s.to_string());
header.report_number = record.get_value("report_number").map(|s| s.to_string());
Ok(HeaderParsing { header, sep })
}
fn next_line(
read_bytes: &mut Vec<u8>,
lines: &mut Lines<impl Read>,
) -> Result<Vec<u8>, &'static str> {
let line = match lines.next() {
None => return Err("unexpected end of file"),
Some(Ok(line)) => line,
Some(Err(_e)) => return Err("error reading line"),
};
if read_bytes.len() > 0 {
read_bytes.push(b'\n');
}
read_bytes.extend_from_slice(&line);
Ok(line)
}
fn byte_slice_contains(haystack: &[u8], needle: &[u8]) -> bool {
haystack
.windows(needle.len())
.any(|window| window == needle)
}
fn byte_slice_to_string(bytes: &[u8]) -> String {
String::from_utf8_lossy(bytes).to_string()
}