use std::sync::Arc;
use anyhow::Result;
use binseq::prelude::*;
use clap::Parser;
use memchr::memmem::Finder;
use parking_lot::Mutex;
#[derive(Clone)]
pub struct GrepCounter {
local_count: usize,
pattern: Finder<'static>,
count: Arc<Mutex<usize>>,
}
impl GrepCounter {
#[must_use]
pub fn new(pattern: &[u8]) -> Self {
Self {
pattern: Finder::new(pattern).into_owned(),
local_count: 0,
count: Arc::new(Mutex::new(0)),
}
}
fn match_sequence(&self, seq: &[u8]) -> bool {
self.pattern.find(seq).is_some()
}
fn pprint(&self) {
println!("Matching records: {}", self.count.lock());
}
}
impl ParallelProcessor for GrepCounter {
fn process_record<R: binseq::BinseqRecord>(&mut self, record: R) -> binseq::Result<()> {
if self.match_sequence(&record.sseq()) || self.match_sequence(&record.xseq()) {
self.local_count += 1;
}
Ok(())
}
fn on_batch_complete(&mut self) -> binseq::Result<()> {
*self.count.lock() += self.local_count;
self.local_count = 0;
Ok(())
}
}
#[derive(Parser)]
struct Args {
#[clap(required = true)]
input: String,
#[clap(required = true)]
pattern: String,
#[clap(short = 'T', long, default_value_t = 0)]
threads: usize,
}
fn main() -> Result<()> {
let args = Args::parse();
let reader = BinseqReader::new(&args.input)?;
let counter = GrepCounter::new(args.pattern.as_bytes());
reader.process_parallel(counter.clone(), args.threads)?;
counter.pprint();
Ok(())
}