use crate::util::oarfish_types::ShortReadRecord;
use anyhow::bail;
use csv::ReaderBuilder;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use tracing::warn;
pub fn read_short_quant_vec(
short_read_path: &str,
txps_name: &[String],
) -> anyhow::Result<Vec<f64>> {
let file = File::open(short_read_path)?;
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.delimiter(b'\t')
.from_reader(file);
let records: HashMap<String, ShortReadRecord> = rdr
.deserialize()
.collect::<Result<Vec<ShortReadRecord>, csv::Error>>()
.unwrap_or_else(|err| {
eprintln!("Failed to deserialize CSV records: {}", err);
std::process::exit(1);
})
.into_iter()
.map(|rec| (rec.name.clone(), rec))
.collect();
{
let txps_name_set: HashSet<&str> = txps_name.iter().map(|x| x.as_str()).collect();
if !records
.iter()
.all(|(k, _v)| txps_name_set.contains(k.as_str()))
{
bail!("There were transcripts in the short read quantification file that didn't appear in the BAM header; cannot proceed.");
}
}
let mut num_missing = 0;
let ordered_rec: Vec<f64> = txps_name
.iter()
.map(|name| {
records.get(name).map_or_else(
|| {
num_missing += 1;
0_f64
},
|rec| rec.num_reads,
)
})
.collect();
if num_missing > 0 {
warn!("There were {} transcripts appearing in the BAM header but missing from the short read quatifications; they have been assumed to have 0 abunance.", num_missing);
}
Ok(ordered_rec)
}