use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;
use indexmap::IndexMap;
use std::fs::File;
use std::io::Write;
use std::io::{self, BufWriter};
use std::io::{BufRead, BufReader, Read};
use std::path::PathBuf;
use crate::error::GRangesError;
use crate::Position;
pub fn read_seqlens(
filepath: impl Into<PathBuf>,
) -> Result<IndexMap<String, Position>, GRangesError> {
let input_file = InputStream::new(filepath);
let reader = input_file.reader()?;
let mut seqlens = IndexMap::new();
for result in reader.lines() {
let line = result?;
let mut columns = line.split('\t');
let seqname = columns.next().unwrap();
let length: Position = columns.next().unwrap().parse()?;
if seqlens.contains_key(seqname) {
return Err(GRangesError::InvalidGenomeFile(format!(
"sequence '{}' is duplicated",
seqname
)));
}
seqlens.insert(seqname.to_string(), length);
}
Ok(seqlens)
}
fn is_gzipped_file(file_path: impl Into<PathBuf>) -> io::Result<bool> {
let mut file = File::open(file_path.into())?;
let mut buffer = [0; 2];
file.read_exact(&mut buffer)?;
Ok(buffer == [0x1f, 0x8b])
}
#[derive(Clone, Debug)]
pub struct InputStream {
pub filepath: PathBuf,
pub comments: Option<Vec<String>>,
pub header: Option<String>,
pub skip_lines: usize,
}
impl InputStream {
pub fn new(filepath: impl Into<PathBuf>) -> Self {
Self {
filepath: filepath.into(),
comments: None,
header: None,
skip_lines: 0,
}
}
pub fn reader(&self) -> io::Result<BufReader<Box<dyn Read>>> {
let file = File::open(self.filepath.clone())?;
let is_gzipped = is_gzipped_file(&self.filepath)?;
let reader: Box<dyn Read> = if is_gzipped {
Box::new(GzDecoder::new(file))
} else {
Box::new(file)
};
Ok(BufReader::new(reader))
}
pub fn collect_metadata(&mut self, comment: &str, header: Option<&str>) -> io::Result<bool> {
let mut buf_reader = self.reader()?;
let mut comments = Vec::new();
let mut line = String::new();
while buf_reader.read_line(&mut line)? > 0 {
if line.starts_with(comment) {
comments.push(line.trim_end().to_string());
self.skip_lines += 1;
} else if let Some(header_string) = header {
if line.starts_with(header_string) {
self.header = Some(line.trim_end().to_string());
self.skip_lines += 1;
break;
}
break;
}
line.clear();
}
self.comments = Some(comments);
Ok(self.skip_lines > 0)
}
pub fn detect_columns(&mut self, delim: &str) -> Result<usize, GRangesError> {
let mut skipped_lines = 0;
let mut buf_reader = self.reader()?;
let mut line = String::new();
while skipped_lines < self.skip_lines {
buf_reader.read_line(&mut line)?;
skipped_lines += 1;
}
buf_reader.read_line(&mut line)?;
Ok(line.split(delim).count())
}
pub fn continue_reading(&self) -> io::Result<BufReader<Box<dyn Read>>> {
let mut buf_reader = self.reader()?;
let mut skipped_lines = 0;
let mut line = String::new();
while skipped_lines < self.skip_lines {
buf_reader.read_line(&mut line)?;
skipped_lines += 1;
line.clear();
}
Ok(buf_reader)
}
}
enum OutputDestination {
File(PathBuf),
Stdout,
}
pub struct OutputStream {
destination: OutputDestination,
pub header: Option<Vec<String>>,
}
impl OutputStream {
pub fn new(filepath: impl Into<PathBuf>, header: Option<Vec<String>>) -> Self {
Self {
destination: OutputDestination::File(filepath.into()),
header,
}
}
pub fn new_stdout(header: Option<Vec<String>>) -> Self {
Self {
destination: OutputDestination::Stdout,
header,
}
}
pub fn writer(&self) -> io::Result<Box<dyn Write>> {
let mut writer: Box<dyn Write> = match &self.destination {
OutputDestination::File(path) => {
let is_gzip = path.ends_with(".gz");
if is_gzip {
Box::new(BufWriter::new(GzEncoder::new(
File::create(path)?,
Compression::default(),
)))
} else {
Box::new(BufWriter::new(File::create(path)?))
}
}
OutputDestination::Stdout => Box::new(BufWriter::new(io::stdout())),
};
if let Some(entries) = &self.header {
for entry in entries {
writeln!(writer, "#{}", entry)?;
}
}
Ok(writer)
}
}