use std::io;
use std::io::Write;
use std::path::PathBuf;
use crate::agg;
use crate::agg::Aggregator;
use crate::errors;
use crate::taxon;
use crate::taxon::TaxonId;
use crate::tree;
#[derive(Debug, StructOpt)]
#[structopt(verbatim_doc_comment)]
#[allow(clippy::tabs_in_doc_comments)]
pub struct JoinKmers {
#[structopt(parse(from_os_str))]
pub taxon_file: PathBuf,
}
pub fn joinkmers(args: JoinKmers) -> errors::Result<()> {
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.delimiter(b'\t')
.from_reader(io::stdin());
let stdout = io::stdout();
let mut handle = stdout.lock();
let taxons = taxon::read_taxa_file(args.taxon_file)?;
let tree = taxon::TaxonTree::new(&taxons);
let by_id = taxon::TaxonList::new(taxons);
let ranksnapping = tree.snapping(&by_id, true);
let validsnapping = tree.snapping(&by_id, false);
let aggregator = tree::mix::MixCalculator::new(tree.root, &by_id, 0.95);
let mut emit = |kmer: &str, tids: Vec<(TaxonId, f32)>| {
let counts = agg::count(tids.into_iter());
if let Ok(aggregate) = aggregator.aggregate(&counts) {
let taxon = ranksnapping[aggregate].unwrap();
let rank = by_id.get_or_unknown(taxon).unwrap().rank;
writeln!(handle, "{}\t{}\t{}", kmer, taxon, rank)
} else {
Ok(())
}
};
let mut current_kmer: Option<String> = Option::None;
let mut current_tids = vec![];
for record in reader.deserialize() {
let (kmer, tid): (String, TaxonId) = record?;
if let Some(c) = current_kmer {
if c != kmer {
emit(&c, current_tids)?;
current_tids = vec![];
}
} else {
current_tids = vec![];
}
current_kmer = Some(kmer);
if let Some(validancestor) = validsnapping[tid] {
current_tids.push((validancestor, 1.0));
}
}
if let Some(c) = current_kmer {
emit(&c, current_tids)?;
}
Ok(())
}