use std::io;
use crate::errors;
use crate::taxon::TaxonId;
#[rustfmt::skip]
#[derive(Debug, StructOpt)]
#[structopt(verbatim_doc_comment)]
pub struct SplitKmers {
#[structopt(short = "k", long = "length", default_value = "9")]
pub length: usize,
#[structopt(short = "p", long = "prefix", default_value = "")]
pub prefix: String,
}
pub fn splitkmers(args: SplitKmers) -> errors::Result<()> {
let byte = args.prefix.as_bytes().first();
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.delimiter(b'\t')
.from_reader(io::stdin());
let mut writer = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_writer(io::stdout());
for record in reader.deserialize() {
let (tid, sequence): (TaxonId, String) = record?;
if sequence.len() < args.length {
continue;
}
for kmer in sequence.as_bytes().windows(args.length) {
if let Some(&b) = byte {
if b == kmer[0] {
writer.serialize((String::from_utf8_lossy(&kmer[1..]), tid))?;
}
} else {
writer.serialize((String::from_utf8_lossy(kmer), tid))?;
}
}
}
Ok(())
}