use std::collections::HashMap;
use std::io::{self, BufRead, BufReader, BufWriter, Write};
use std::path::Path;
use rsomics_common::{Result, RsomicsError};
pub fn reheader_replace(
input: &mut dyn io::Read,
header_file: &Path,
output: &mut dyn io::Write,
) -> Result<u64> {
let new_header = std::fs::read_to_string(header_file).map_err(RsomicsError::Io)?;
let mut out = BufWriter::new(output);
for line in new_header.lines() {
out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
out.write_all(b"\n").map_err(RsomicsError::Io)?;
}
let mut reader = BufReader::new(input);
let mut line = String::new();
let mut records: u64 = 0;
loop {
line.clear();
let n = reader.read_line(&mut line).map_err(RsomicsError::Io)?;
if n == 0 {
break;
}
let trimmed = line.trim_end_matches(['\n', '\r']);
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with('#') {
continue;
}
out.write_all(trimmed.as_bytes())
.map_err(RsomicsError::Io)?;
out.write_all(b"\n").map_err(RsomicsError::Io)?;
records += 1;
}
out.flush().map_err(RsomicsError::Io)?;
Ok(records)
}
fn parse_samples_file(path: &Path) -> Result<(Vec<String>, HashMap<String, String>)> {
let content = std::fs::read_to_string(path).map_err(RsomicsError::Io)?;
let mut positional: Vec<String> = Vec::new();
let mut map: HashMap<String, String> = HashMap::new();
let mut map_mode = false;
for raw in content.lines() {
let line = raw.trim();
if line.is_empty() {
continue;
}
let mut parts = line.splitn(2, |c: char| c.is_whitespace());
let first = parts.next().unwrap_or("").trim();
let second = parts.next().map(|s| s.trim());
if let Some(new_name) = second
&& !new_name.is_empty()
{
map_mode = true;
map.insert(first.to_owned(), new_name.to_owned());
continue;
}
positional.push(first.to_owned());
}
if map_mode {
Ok((Vec::new(), map))
} else {
Ok((positional, HashMap::new()))
}
}
pub fn reheader_samples(
input: &mut dyn io::Read,
samples_file: &Path,
output: &mut dyn io::Write,
) -> Result<u64> {
let (positional, map) = parse_samples_file(samples_file)?;
let mut out = BufWriter::new(output);
let mut reader = BufReader::new(input);
let mut line = String::new();
let mut records: u64 = 0;
loop {
line.clear();
let n = reader.read_line(&mut line).map_err(RsomicsError::Io)?;
if n == 0 {
break;
}
let trimmed = line.trim_end_matches(['\n', '\r']);
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with('#') {
if trimmed.starts_with("#CHROM") {
let cols: Vec<&str> = trimmed.split('\t').collect();
let fixed_cols = if cols.len() > 9 {
let fixed: Vec<String> = cols[9..]
.iter()
.enumerate()
.map(|(i, old)| {
if !map.is_empty() {
map.get(*old).map(String::as_str).unwrap_or(old).to_owned()
} else if i < positional.len() {
positional[i].clone()
} else {
old.to_string()
}
})
.collect();
[
&cols[..9],
fixed
.iter()
.map(String::as_str)
.collect::<Vec<_>>()
.as_slice(),
]
.concat()
.join("\t")
} else {
trimmed.to_owned()
};
out.write_all(fixed_cols.as_bytes())
.map_err(RsomicsError::Io)?;
} else {
out.write_all(trimmed.as_bytes())
.map_err(RsomicsError::Io)?;
}
out.write_all(b"\n").map_err(RsomicsError::Io)?;
} else {
out.write_all(trimmed.as_bytes())
.map_err(RsomicsError::Io)?;
out.write_all(b"\n").map_err(RsomicsError::Io)?;
records += 1;
}
}
out.flush().map_err(RsomicsError::Io)?;
Ok(records)
}
pub fn passthrough(input: &mut dyn io::Read, output: &mut dyn io::Write) -> Result<u64> {
let mut out = BufWriter::new(output);
let mut reader = BufReader::new(input);
let mut line = String::new();
let mut records: u64 = 0;
loop {
line.clear();
let n = reader.read_line(&mut line).map_err(RsomicsError::Io)?;
if n == 0 {
break;
}
let trimmed = line.trim_end_matches(['\n', '\r']);
if trimmed.is_empty() {
continue;
}
out.write_all(trimmed.as_bytes())
.map_err(RsomicsError::Io)?;
out.write_all(b"\n").map_err(RsomicsError::Io)?;
if !trimmed.starts_with('#') {
records += 1;
}
}
out.flush().map_err(RsomicsError::Io)?;
Ok(records)
}