extern crate noodles;
use std::collections::BTreeMap;
use kmers::BigSparseAccumulator;
fn main() -> std::io::Result<()> {
let args = Vec::from_iter(std::env::args());
if args.len() != 2 {
println!("to run this example, download a FASTA file such as the bacterial genome at");
println!(" https://www.ncbi.nlm.nih.gov/nuccore/NZ_CP113114.1?report=fasta");
println!("then invoke the code in the following manner:");
println!(" cargo run --example count_fasta -- NZ_CP113114.1.fasta");
}
let k = 15;
let buffer_size = 1024*1024;
let mut reader = noodles::fasta::reader::Builder.build_from_path(&args[1])?;
let mut acc = BigSparseAccumulator::new(k, buffer_size);
for res in reader.records() {
let rec = res?;
acc.add_both(rec.sequence());
}
let mut hist = BTreeMap::new();
for (_x,f) in acc.kmer_frequencies() {
*hist.entry(f).or_insert(0) += 1;
}
for (f, c) in hist.iter() {
println!("{}\t{}", f, c);
}
Ok(())
}